From eefccca99ed9ea22cad686d3fddfd327c2dbc877 Mon Sep 17 00:00:00 2001 From: "kaf24@firebug.cl.cam.ac.uk" Date: Sat, 25 Feb 2006 17:58:37 +0100 Subject: [PATCH] New VCPUOP_get_runstate_info hypercall. Returns information about the current run state of a VCPU (running, runnable, blocked, etc.) and the total time spent in each state since the VCPU was created. Signed-off-by: Keir Fraser --- xen/common/dom0_ops.c | 9 ++- xen/common/domain.c | 13 ++++ xen/common/keyhandler.c | 6 -- xen/common/sched_bvt.c | 11 ++- xen/common/sched_sedf.c | 8 +- xen/common/schedule.c | 146 ++++++++++++++++--------------------- xen/include/public/vcpu.h | 34 +++++++++ xen/include/xen/sched-if.h | 6 -- xen/include/xen/sched.h | 12 +-- 9 files changed, 130 insertions(+), 115 deletions(-) diff --git a/xen/common/dom0_ops.c b/xen/common/dom0_ops.c index 003a47b620..e7e725f110 100644 --- a/xen/common/dom0_ops.c +++ b/xen/common/dom0_ops.c @@ -46,6 +46,7 @@ static void getdomaininfo(struct domain *d, dom0_getdomaininfo_t *info) struct vcpu *v; u64 cpu_time = 0; int flags = DOMFLAGS_BLOCKED; + struct vcpu_runstate_info runstate; info->domain = d->domain_id; info->nr_online_vcpus = 0; @@ -55,7 +56,8 @@ static void getdomaininfo(struct domain *d, dom0_getdomaininfo_t *info) * - domain is marked as running if any of its vcpus is running */ for_each_vcpu ( d, v ) { - cpu_time += v->cpu_time; + vcpu_runstate_get(v, &runstate); + cpu_time += runstate.time[RUNSTATE_running]; info->max_vcpu_id = v->vcpu_id; if ( !test_bit(_VCPUF_down, &v->vcpu_flags) ) { @@ -497,6 +499,7 @@ long do_dom0_op(struct dom0_op *u_dom0_op) { struct domain *d; struct vcpu *v; + struct vcpu_runstate_info runstate; ret = -ESRCH; if ( (d = find_domain_by_id(op->u.getvcpuinfo.domain)) == NULL ) @@ -510,10 +513,12 @@ long do_dom0_op(struct dom0_op *u_dom0_op) if ( (v = d->vcpu[op->u.getvcpuinfo.vcpu]) == NULL ) goto getvcpuinfo_out; + vcpu_runstate_get(v, &runstate); + op->u.getvcpuinfo.online = !test_bit(_VCPUF_down, &v->vcpu_flags); op->u.getvcpuinfo.blocked = test_bit(_VCPUF_blocked, &v->vcpu_flags); op->u.getvcpuinfo.running = test_bit(_VCPUF_running, &v->vcpu_flags); - op->u.getvcpuinfo.cpu_time = v->cpu_time; + op->u.getvcpuinfo.cpu_time = runstate.time[RUNSTATE_running]; op->u.getvcpuinfo.cpu = v->processor; op->u.getvcpuinfo.cpumap = 0; memcpy(&op->u.getvcpuinfo.cpumap, diff --git a/xen/common/domain.c b/xen/common/domain.c index 749d7c7fe9..598d7e1b69 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -451,6 +451,19 @@ long do_vcpu_op(int cmd, int vcpuid, void *arg) case VCPUOP_is_up: rc = !test_bit(_VCPUF_down, &v->vcpu_flags); break; + + case VCPUOP_get_runstate_info: + { + struct vcpu_runstate_info runstate; + vcpu_runstate_get(v, &runstate); + if ( copy_to_user(arg, &runstate, sizeof(runstate)) ) + rc = -EFAULT; + break; + } + + default: + rc = -ENOSYS; + break; } return rc; diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c index 1b06e0dc3a..9f7b774ea5 100644 --- a/xen/common/keyhandler.c +++ b/xen/common/keyhandler.c @@ -169,8 +169,6 @@ static void dump_domains(unsigned char key) } extern void dump_runq(unsigned char key); -extern void print_sched_histo(unsigned char key); -extern void reset_sched_histo(unsigned char key); #ifndef NDEBUG extern void audit_domains_key(unsigned char key); #endif @@ -206,10 +204,6 @@ void initialize_keytable(void) 'd', dump_registers, "dump registers"); register_keyhandler( 'h', show_handlers, "show this message"); - register_keyhandler( - 'l', print_sched_histo, "print sched latency histogram"); - register_keyhandler( - 'L', reset_sched_histo, "reset sched latency histogram"); register_keyhandler( 'q', dump_domains, "dump domain (and guest debug) info"); register_keyhandler( diff --git a/xen/common/sched_bvt.c b/xen/common/sched_bvt.c index 9996e8bfac..4b29cf1f22 100644 --- a/xen/common/sched_bvt.c +++ b/xen/common/sched_bvt.c @@ -132,13 +132,13 @@ static void unwarp_timer_fn(void *data) vcpu_schedule_unlock_irq(v); } -static inline u32 calc_avt(struct vcpu *d, s_time_t now) +static inline u32 calc_avt(struct vcpu *v, s_time_t now) { u32 ranfor, mcus; - struct bvt_dom_info *inf = BVT_INFO(d->domain); - struct bvt_vcpu_info *einf = EBVT_INFO(d); + struct bvt_dom_info *inf = BVT_INFO(v->domain); + struct bvt_vcpu_info *einf = EBVT_INFO(v); - ranfor = (u32)(now - d->lastschd); + ranfor = (u32)(now - v->runstate.state_entry_time); mcus = (ranfor + MCU - 1)/MCU; return einf->avt + mcus * inf->mcu_advance; @@ -262,7 +262,7 @@ static void bvt_wake(struct vcpu *v) curr_evt = calc_evt(curr, calc_avt(curr, now)); /* Calculate the time the current domain would run assuming the second smallest evt is of the newly woken domain */ - r_time = curr->lastschd + + r_time = curr->runstate.state_entry_time + ((einf->evt - curr_evt) / BVT_INFO(curr->domain)->mcu_advance) + ctx_allow; @@ -558,7 +558,6 @@ static void bvt_dump_cpu_state(int i) printk("%3d: %u has=%c ", loop++, v->domain->domain_id, test_bit(_VCPUF_running, &v->vcpu_flags) ? 'T':'F'); bvt_dump_runq_el(v); - printk("c=0x%X%08X\n", (u32)(v->cpu_time>>32), (u32)v->cpu_time); printk(" l: %p n: %p p: %p\n", &vcpu_inf->run_list, vcpu_inf->run_list.next, vcpu_inf->run_list.prev); diff --git a/xen/common/sched_sedf.c b/xen/common/sched_sedf.c index d7a68262b1..38594e0461 100644 --- a/xen/common/sched_sedf.c +++ b/xen/common/sched_sedf.c @@ -1408,18 +1408,14 @@ static void sedf_dump_domain(struct vcpu *d) { printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id, test_bit(_VCPUF_running, &d->vcpu_flags) ? 'T':'F'); - printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu c=%"PRIu64 + printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu" " sc=%i xtr(%s)=%"PRIu64" ew=%hu", EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs, - EDOM_INFO(d)->weight, d->cpu_time, + EDOM_INFO(d)->weight, EDOM_INFO(d)->score[EXTRA_UTIL_Q], (EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no", EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight); - if ( d->cpu_time != 0 ) - printf(" (%"PRIu64"%%)", (EDOM_INFO(d)->extra_time_tot * 100) - / d->cpu_time); - #ifdef SEDF_STATS if ( EDOM_INFO(d)->block_time_tot != 0 ) printf(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) / diff --git a/xen/common/schedule.c b/xen/common/schedule.c index b10c7ac239..9482447043 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -36,14 +36,6 @@ extern void arch_getdomaininfo_ctxt(struct vcpu *, static char opt_sched[10] = "sedf"; string_param("sched", opt_sched); -/*#define WAKE_HISTO*/ -/*#define BLOCKTIME_HISTO*/ -#if defined(WAKE_HISTO) -#define BUCKETS 31 -#elif defined(BLOCKTIME_HISTO) -#define BUCKETS 200 -#endif - #define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */ /* Various timer handlers. */ @@ -73,6 +65,36 @@ static struct scheduler ops; /* Per-CPU periodic timer sends an event to the currently-executing domain. */ static struct timer t_timer[NR_CPUS]; +static inline void vcpu_runstate_change( + struct vcpu *v, int new_state, s_time_t new_entry_time) +{ + ASSERT(v->runstate.state != new_state); + ASSERT(spin_is_locked(&schedule_data[v->processor].schedule_lock)); + + v->runstate.time[v->runstate.state] += + new_entry_time - v->runstate.state_entry_time; + v->runstate.state_entry_time = new_entry_time; + v->runstate.state = new_state; +} + +void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate) +{ + if ( likely(v == current) ) + { + /* Fast lock-free path. */ + memcpy(runstate, &v->runstate, sizeof(*runstate)); + ASSERT(runstate->state = RUNSTATE_running); + runstate->time[RUNSTATE_running] += NOW() - runstate->state_entry_time; + } + else + { + vcpu_schedule_lock_irq(v); + memcpy(runstate, &v->runstate, sizeof(*runstate)); + runstate->time[runstate->state] += NOW() - runstate->state_entry_time; + vcpu_schedule_unlock_irq(v); + } +} + struct domain *alloc_domain(void) { struct domain *d; @@ -119,6 +141,9 @@ struct vcpu *alloc_vcpu( v->cpu_affinity = is_idle_domain(d) ? cpumask_of_cpu(cpu_id) : CPU_MASK_ALL; + v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline; + v->runstate.state_entry_time = NOW(); + if ( (vcpu_id != 0) && !is_idle_domain(d) ) set_bit(_VCPUF_down, &v->vcpu_flags); @@ -165,8 +190,15 @@ void vcpu_sleep_nosync(struct vcpu *v) unsigned long flags; vcpu_schedule_lock_irqsave(v, flags); + if ( likely(!vcpu_runnable(v)) ) + { + if ( v->runstate.state == RUNSTATE_runnable ) + vcpu_runstate_change(v, RUNSTATE_offline, NOW()); + SCHED_OP(sleep, v); + } + vcpu_schedule_unlock_irqrestore(v, flags); TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id); @@ -187,11 +219,19 @@ void vcpu_wake(struct vcpu *v) unsigned long flags; vcpu_schedule_lock_irqsave(v, flags); + if ( likely(vcpu_runnable(v)) ) { + if ( v->runstate.state >= RUNSTATE_blocked ) + vcpu_runstate_change(v, RUNSTATE_runnable, NOW()); SCHED_OP(wake, v); - v->wokenup = NOW(); } + else if ( !test_bit(_VCPUF_blocked, &v->vcpu_flags) ) + { + if ( v->runstate.state == RUNSTATE_blocked ) + vcpu_runstate_change(v, RUNSTATE_offline, NOW()); + } + vcpu_schedule_unlock_irqrestore(v, flags); TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id); @@ -376,8 +416,6 @@ static void __enter_scheduler(void) stop_timer(&schedule_data[cpu].s_timer); - prev->cpu_time += now - prev->lastschd; - /* get policy-specific decision on scheduling... */ next_slice = ops.do_schedule(now); @@ -386,8 +424,6 @@ static void __enter_scheduler(void) schedule_data[cpu].curr = next; - next->lastschd = now; - set_timer(&schedule_data[cpu].s_timer, now + r_time); if ( unlikely(prev == next) ) @@ -397,38 +433,23 @@ static void __enter_scheduler(void) } TRACE_2D(TRC_SCHED_SWITCH_INFPREV, - prev->domain->domain_id, now - prev->lastschd); + prev->domain->domain_id, + now - prev->runstate.state_entry_time); TRACE_3D(TRC_SCHED_SWITCH_INFNEXT, - next->domain->domain_id, now - next->wokenup, r_time); + next->domain->domain_id, + (next->runstate.state == RUNSTATE_runnable) ? + (now - next->runstate.state_entry_time) : 0, + r_time); - /* - * Logic of wokenup field in domain struct: - * Used to calculate "waiting time", which is the time that a domain - * spends being "runnable", but not actually running. wokenup is set - * set whenever a domain wakes from sleeping. However, if wokenup is not - * also set here then a preempted runnable domain will get a screwed up - * "waiting time" value next time it is scheduled. - */ - prev->wokenup = now; + ASSERT(prev->runstate.state == RUNSTATE_running); + vcpu_runstate_change( + prev, + (test_bit(_VCPUF_blocked, &prev->vcpu_flags) ? RUNSTATE_blocked : + (vcpu_runnable(prev) ? RUNSTATE_runnable : RUNSTATE_offline)), + now); -#if defined(WAKE_HISTO) - if ( !is_idle_vcpu(next) && next->wokenup ) - { - ulong diff = (ulong)(now - next->wokenup); - diff /= (ulong)MILLISECS(1); - if (diff <= BUCKETS-2) schedule_data[cpu].hist[diff]++; - else schedule_data[cpu].hist[BUCKETS-1]++; - } - next->wokenup = (s_time_t)0; -#elif defined(BLOCKTIME_HISTO) - prev->lastdeschd = now; - if ( !is_idle_vcpu(next) ) - { - ulong diff = (ulong)((now - next->lastdeschd) / MILLISECS(10)); - if (diff <= BUCKETS-2) schedule_data[cpu].hist[diff]++; - else schedule_data[cpu].hist[BUCKETS-1]++; - } -#endif + ASSERT(next->runstate.state != RUNSTATE_running); + vcpu_runstate_change(next, RUNSTATE_running, now); ASSERT(!test_bit(_VCPUF_running, &next->vcpu_flags)); set_bit(_VCPUF_running, &next->vcpu_flags); @@ -568,47 +589,6 @@ void dump_runq(unsigned char key) local_irq_restore(flags); } -#if defined(WAKE_HISTO) || defined(BLOCKTIME_HISTO) - -void print_sched_histo(unsigned char key) -{ - int i, j, k; - for_each_online_cpu ( k ) - { - j = 0; - printf ("CPU[%02d]: scheduler latency histogram (ms:[count])\n", k); - for ( i = 0; i < BUCKETS; i++ ) - { - if ( schedule_data[k].hist[i] != 0 ) - { - if ( i < BUCKETS-1 ) - printk("%2d:[%7u] ", i, schedule_data[k].hist[i]); - else - printk(" >:[%7u] ", schedule_data[k].hist[i]); - if ( !(++j % 5) ) - printk("\n"); - } - } - printk("\n"); - } - -} - -void reset_sched_histo(unsigned char key) -{ - int i, j; - for ( j = 0; j < NR_CPUS; j++ ) - for ( i=0; i < BUCKETS; i++ ) - schedule_data[j].hist[i] = 0; -} - -#else - -void print_sched_histo(unsigned char key) { } -void reset_sched_histo(unsigned char key) { } - -#endif - /* * Local variables: * mode: C diff --git a/xen/include/public/vcpu.h b/xen/include/public/vcpu.h index fa9537f1fe..8a425b57da 100644 --- a/xen/include/public/vcpu.h +++ b/xen/include/public/vcpu.h @@ -51,6 +51,40 @@ /* Returns 1 if the given VCPU is up. */ #define VCPUOP_is_up 3 +/* + * Return information about the state and running time of a VCPU. + * @extra_arg == pointer to xen_vcpu_info structure. + */ +#define VCPUOP_get_runstate_info 4 +typedef struct vcpu_runstate_info { + /* VCPU's current state (RUNSTATE_*). */ + int state; + /* When was current state entered (system time, ns)? */ + uint64_t state_entry_time; + /* + * Time spent in each RUNSTATE_* (ns). The sum of these times is + * guaranteed not to drift from system time. + */ + uint64_t time[4]; +} vcpu_runstate_info_t; + +/* VCPU is currently running on a physical CPU. */ +#define RUNSTATE_running 0 + +/* VCPU is runnable, but not currently scheduled on any physical CPU. */ +#define RUNSTATE_runnable 1 + +/* VCPU is blocked (a.k.a. idle). It is therefore not runnable. */ +#define RUNSTATE_blocked 2 + +/* + * VCPU is not runnable, but it is not blocked. + * This is a 'catch all' state for things like hotplug and pauses by the + * system administrator (or for critical sections in the hypervisor). + * RUNSTATE_blocked dominates this state (it is the preferred state). + */ +#define RUNSTATE_offline 3 + #endif /* __XEN_PUBLIC_VCPU_H__ */ /* diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h index d61d5c70d3..0317d9433f 100644 --- a/xen/include/xen/sched-if.h +++ b/xen/include/xen/sched-if.h @@ -8,9 +8,6 @@ #ifndef __XEN_SCHED_IF_H__ #define __XEN_SCHED_IF_H__ -#define BUCKETS 10 -/*300*/ - struct schedule_data { spinlock_t schedule_lock; /* spinlock protecting curr */ struct vcpu *curr; /* current task */ @@ -18,9 +15,6 @@ struct schedule_data { void *sched_priv; struct timer s_timer; /* scheduling timer */ unsigned long tick; /* current periodic 'tick' */ -#ifdef BUCKETS - u32 hist[BUCKETS]; /* for scheduler latency histogram */ -#endif } __cacheline_aligned; extern struct schedule_data schedule_data[]; diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 0d265badb8..f6ab18b31b 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -63,15 +64,13 @@ struct vcpu struct vcpu *next_in_list; - struct timer timer; /* one-shot timer for timeout values */ + struct timer timer; /* one-shot timer for timeout values */ unsigned long sleep_tick; /* tick at which this vcpu started sleep */ - s_time_t lastschd; /* time this domain was last scheduled */ - s_time_t lastdeschd; /* time this domain was last descheduled */ - s_time_t cpu_time; /* total CPU time received till now */ - s_time_t wokenup; /* time domain got woken up */ void *sched_priv; /* scheduler-specific data */ + struct vcpu_runstate_info runstate; + unsigned long vcpu_flags; u16 virq_to_evtchn[NR_VIRQS]; @@ -397,7 +396,6 @@ extern struct domain *domain_list; #define _DOMF_debugging 4 #define DOMF_debugging (1UL<<_DOMF_debugging) - static inline int vcpu_runnable(struct vcpu *v) { return ( (atomic_read(&v->pausecnt) == 0) && @@ -415,6 +413,8 @@ void cpu_init(void); int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity); +void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate); + static inline void vcpu_unblock(struct vcpu *v) { if ( test_and_clear_bit(_VCPUF_blocked, &v->vcpu_flags) ) -- 2.30.2